11  Enrichment Circos Bar

Tip

This script primarily carries out the reading and merging of transcriptomic data from tumor and normal samples, and processes this data through logarithmic transformation. Subsequently, it reads and handles differentially expressed gene data, performing multiple biological enrichment analyses, including biological processes, molecular functions, cellular components, KEGG pathways, disease ontology, and Reactome pathways. Finally, the script filters specific enrichment results and uses ggplot2 to create a circular bar chart to visualize these results, effectively displaying differences in gene expression across various categories. This provides valuable insights for biomedical research, especially in exploring the expression differences between tumors and normal tissues.

library(TransProR)
library(clusterProfiler)
library(org.Hs.eg.db)
library(DOSE)
library(ReactomePA)
library(dplyr)
library(grid)
library(ggplot2)
library(magick)
library(htmlwidgets)
library(webshot)
library(graphics)
library(cowplot)
library(pcutils)

11.1 Load data

tumor <- readRDS("../test_TransProR/generated_data1/removebatch_SKCM_Skin_TCGA_exp_tumor.rds")
normal <- readRDS('../test_TransProR/generated_data1/removebatch_SKCM_Skin_Normal_TCGA_GTEX_count.rds')
# Merge the datasets, ensuring both have genes as row names
all_count_exp <- merge(tumor, normal, by = "row.names")
all_count_exp <- tibble::column_to_rownames(all_count_exp, var = "Row.names")  # Set the row names

# Drawing data
all_count_exp <- log_transform(all_count_exp)
[1] "log2 transform finished"
DEG_deseq2 <- readRDS('../test_TransProR/Select DEGs/DEG_deseq2.Rdata')
#head(all_count_exp, 1)
head(DEG_deseq2, 5)
          baseMean log2FoldChange      lfcSE      stat pvalue padj change
A1BG      134.6084      -2.549682 0.05677219 -44.91075      0    0   down
A2M     30208.9912       3.251168 0.08394645  38.72907      0    0     up
AADACL2   801.4538      -8.229956 0.18969649 -43.38486      0    0   down
AARS2    1153.5493       1.624753 0.03283522  49.48202      0    0 stable
AARSD1    567.8672      -2.082289 0.02275703 -91.50088      0    0 stable

11.2 Convert from SYMBOL to ENTREZID

# Convert from SYMBOL to ENTREZID for convenient enrichment analysis later. It's crucial to do this now as a direct conversion may result in a reduced set of genes due to non-one-to-one correspondence.

# DEG_deseq2
# Retrieve gene list
gene <- rownames(DEG_deseq2)
# Perform conversion
gene = bitr(gene, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
'select()' returned 1:many mapping between keys and columns
Warning in bitr(gene, fromType = "SYMBOL", toType = "ENTREZID", OrgDb =
"org.Hs.eg.db"): 43.37% of input gene IDs are fail to map...
# Remove duplicates and merge
gene <- dplyr::distinct(gene, SYMBOL, .keep_all=TRUE)
# Extract the SYMBOL column as a vector from the first dataset
symbols_vector <- gene$SYMBOL
# Use the SYMBOL column to filter corresponding rows from the second dataset by row names
DEG_deseq2 <- DEG_deseq2[rownames(DEG_deseq2) %in% symbols_vector, ]
head(DEG_deseq2, 5)
          baseMean log2FoldChange      lfcSE      stat pvalue padj change
A1BG      134.6084      -2.549682 0.05677219 -44.91075      0    0   down
A2M     30208.9912       3.251168 0.08394645  38.72907      0    0     up
AADACL2   801.4538      -8.229956 0.18969649 -43.38486      0    0   down
AARS2    1153.5493       1.624753 0.03283522  49.48202      0    0 stable
AARSD1    567.8672      -2.082289 0.02275703 -91.50088      0    0 stable

11.3 Select differentially expressed genes

Diff_deseq2 <- filter_diff_genes(
  DEG_deseq2, 
  p_val_col = "pvalue", 
  log_fc_col = "log2FoldChange",
  p_val_threshold = 0.01, 
  log_fc_threshold = 8
  )
# First, obtain a list of gene names from the row names of the first dataset
gene_names <- rownames(Diff_deseq2)
# Find the matching rows in the second dataframe
matched_rows <- all_count_exp[gene_names, ]
# Calculate the mean for each row
averages <- rowMeans(matched_rows, na.rm = TRUE)
# Append the averages as a new column to the first dataframe
Diff_deseq2$average <- averages
Diff_deseq2$ID <- rownames(Diff_deseq2)
Diff_deseq2$changetype <- ifelse(Diff_deseq2$change == 'up', 1, -1)
# Define a small threshold value
small_value <- .Machine$double.xmin
# Before calculating -log10, replace zeroes with the small threshold value and assign it to a new column
Diff_deseq2$log_pvalue <- ifelse(Diff_deseq2$pvalue == 0, -log10(small_value), -log10(Diff_deseq2$pvalue))
# Extract the expression data corresponding to the differentially expressed genes
heatdata_deseq2 <- all_count_exp[rownames(Diff_deseq2), ]
#head(heatdata_deseq2, 1)

11.4 Diff_deseq2 Enrichment Analysis

11.4.1 Obtain the list of genes

gene <- rownames(Diff_deseq2)
## Convert
gene = bitr(gene, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
'select()' returned 1:1 mapping between keys and columns
## Remove duplicates and merge
gene <- dplyr::distinct(gene, SYMBOL, .keep_all=TRUE)
gene_df <- data.frame(logFC=Diff_deseq2$log2FoldChange,
                      SYMBOL = rownames(Diff_deseq2))
gene_df <- merge(gene_df, gene, by="SYMBOL")
GO_deseq2 <- gene_df$ENTREZID

11.4.2 GO Analysis for Biological Processes (BP)

# Perform gene enrichment analysis
erich.go.BP_deseq2 <- enrichGO(
  gene = GO_deseq2,
  OrgDb = org.Hs.eg.db,
  keyType = "ENTREZID",
  ont = 'BP', # Other categories: "CC", "MF" for molecular function
  pvalueCutoff = 0.05,
  qvalueCutoff = 0.05,
  readable = TRUE)
erich.go.BP.outdata_deseq2 <- as.data.frame(erich.go.BP_deseq2)
# Uncomment to write output to CSV
# write.csv(erich.go.BP.outdata_deseq2, "E:/fruit/erich.go.BP.outdata.csv")
head(erich.go.BP.outdata_deseq2, 5)
                   ID                                         Description
GO:0002377 GO:0002377                           immunoglobulin production
GO:0002440 GO:0002440 production of molecular mediator of immune response
GO:0016064 GO:0016064             immunoglobulin mediated immune response
GO:0019724 GO:0019724                            B cell mediated immunity
GO:0002449 GO:0002449                        lymphocyte mediated immunity
           GeneRatio   BgRatio       pvalue     p.adjust       qvalue
GO:0002377    34/179 196/18870 2.480604e-33 2.909749e-30 2.909749e-30
GO:0002440    34/179 314/18870 3.023121e-26 1.773060e-23 1.773060e-23
GO:0016064    29/179 205/18870 7.968166e-26 3.115553e-23 3.115553e-23
GO:0019724    29/179 208/18870 1.224253e-25 3.590121e-23 3.590121e-23
GO:0002449    30/179 368/18870 1.235430e-19 2.898319e-17 2.898319e-17
                                                                                                                                                                                                                                                                                                          geneID
GO:0002377 IGKV1-16/IGKV1-17/IGKV1-27/IGKV1-5/IGKV1-6/IGKV1-9/IGKV2D-29/IGKV3-15/IGKV3-20/IGKV3D-20/IGKV4-1/IGKV5-2/IGLV1-40/IGLV1-44/IGLV1-47/IGLV2-11/IGLV2-14/IGLV2-23/IGLV3-1/IGLV3-10/IGLV3-19/IGLV3-21/IGLV3-25/IGLV3-27/IGLV3-9/IGLV4-60/IGLV4-69/IGLV5-45/IGLV6-57/IGLV7-43/IGLV8-61/IGLV9-49/TREX1/XBP1
GO:0002440 IGKV1-16/IGKV1-17/IGKV1-27/IGKV1-5/IGKV1-6/IGKV1-9/IGKV2D-29/IGKV3-15/IGKV3-20/IGKV3D-20/IGKV4-1/IGKV5-2/IGLV1-40/IGLV1-44/IGLV1-47/IGLV2-11/IGLV2-14/IGLV2-23/IGLV3-1/IGLV3-10/IGLV3-19/IGLV3-21/IGLV3-25/IGLV3-27/IGLV3-9/IGLV4-60/IGLV4-69/IGLV5-45/IGLV6-57/IGLV7-43/IGLV8-61/IGLV9-49/TREX1/XBP1
GO:0016064                                                IGHG1/IGHG3/IGHM/IGHV1-18/IGHV1-24/IGHV1-58/IGHV1-69/IGHV1-69-2/IGHV2-26/IGHV3-11/IGHV3-13/IGHV3-15/IGHV3-21/IGHV3-23/IGHV3-30/IGHV3-43/IGHV3-48/IGHV3-49/IGHV3-53/IGHV3-66/IGHV3-73/IGHV4-28/IGHV4-31/IGHV4-34/IGHV4-39/IGHV4-59/IGHV5-51/IGLC1/TREX1
GO:0019724                                                IGHG1/IGHG3/IGHM/IGHV1-18/IGHV1-24/IGHV1-58/IGHV1-69/IGHV1-69-2/IGHV2-26/IGHV3-11/IGHV3-13/IGHV3-15/IGHV3-21/IGHV3-23/IGHV3-30/IGHV3-43/IGHV3-48/IGHV3-49/IGHV3-53/IGHV3-66/IGHV3-73/IGHV4-28/IGHV4-31/IGHV4-34/IGHV4-39/IGHV4-59/IGHV5-51/IGLC1/TREX1
GO:0002449                                         CLEC2A/IGHG1/IGHG3/IGHM/IGHV1-18/IGHV1-24/IGHV1-58/IGHV1-69/IGHV1-69-2/IGHV2-26/IGHV3-11/IGHV3-13/IGHV3-15/IGHV3-21/IGHV3-23/IGHV3-30/IGHV3-43/IGHV3-48/IGHV3-49/IGHV3-53/IGHV3-66/IGHV3-73/IGHV4-28/IGHV4-31/IGHV4-34/IGHV4-39/IGHV4-59/IGHV5-51/IGLC1/TREX1
           Count
GO:0002377    34
GO:0002440    34
GO:0016064    29
GO:0019724    29
GO:0002449    30

11.4.3 GO Analysis for Molecular Functions (MF)

# Perform gene enrichment analysis
erich.go.MF_deseq2 <- enrichGO(
  gene = GO_deseq2,
  OrgDb = org.Hs.eg.db,
  keyType = "ENTREZID",
  ont = 'MF', # Other categories: "CC", "MF" for molecular function
  pvalueCutoff = 0.05,
  qvalueCutoff = 0.05,
  readable = TRUE)

erich.go.MF.outdata_deseq2 <- as.data.frame(erich.go.MF_deseq2)
# Uncomment to write output to CSV
# write.csv(erich.go.MF.outdata_deseq2, "E:/fruit/erich.go.MF.outdata.csv")

head(erich.go.MF.outdata_deseq2, 5)
                   ID                              Description GeneRatio
GO:0003823 GO:0003823                          antigen binding    49/188
GO:0030280 GO:0030280 structural constituent of skin epidermis     7/188
GO:0034987 GO:0034987          immunoglobulin receptor binding     3/188
             BgRatio       pvalue     p.adjust       qvalue
GO:0003823 178/18496 1.931643e-57 4.829108e-55 4.829108e-55
GO:0030280  36/18496 6.521227e-08 8.151534e-06 8.151534e-06
GO:0034987  16/18496 5.249729e-04 4.374774e-02 4.374774e-02
                                                                                                                                                                                                                                                                                                                                                                                                                                            geneID
GO:0003823 IGHG1/IGHG3/IGHM/IGHV1-18/IGHV1-24/IGHV1-58/IGHV1-69/IGHV1-69-2/IGHV2-26/IGHV3-11/IGHV3-13/IGHV3-15/IGHV3-21/IGHV3-23/IGHV3-30/IGHV3-43/IGHV3-48/IGHV3-49/IGHV3-53/IGHV3-66/IGHV3-73/IGHV4-28/IGHV4-31/IGHV4-34/IGHV4-39/IGHV4-59/IGHV5-51/IGKV1-16/IGKV1-17/IGKV1-5/IGKV3-15/IGKV3-20/IGKV4-1/IGKV5-2/IGLC1/IGLV1-40/IGLV1-44/IGLV1-47/IGLV2-11/IGLV2-14/IGLV2-23/IGLV3-1/IGLV3-19/IGLV3-21/IGLV3-25/IGLV3-27/IGLV6-57/IGLV7-43/TRBV28
GO:0030280                                                                                                                                                                                                                                                                                                                                                                                             KRT2/KRT71/KRT73/KRT77/KRT83/KRT85/KRTAP1-3
GO:0034987                                                                                                                                                                                                                                                                                                                                                                                                                        IGHG1/IGHG3/IGHM
           Count
GO:0003823    49
GO:0030280     7
GO:0034987     3

11.4.4 GO Analysis for Cellular Components (CC)

# Perform gene enrichment analysis
erich.go.CC_deseq2 <- enrichGO(
  gene = GO_deseq2,
  OrgDb = org.Hs.eg.db,
  keyType = "ENTREZID",
  ont = 'CC', # Other categories: "CC", "MF" for molecular function
  pvalueCutoff = 0.05,
  qvalueCutoff = 0.05,
  readable = TRUE)

erich.go.CC.outdata_deseq2 <- as.data.frame(erich.go.CC_deseq2)
# Uncomment to write output to CSV
# write.csv(erich.go.CC.outdata_deseq2, "E:/fruit/erich.go.CC.outdata.csv")

11.4.5 KEGG Analysis

kegg.out_deseq2 = enrichKEGG(
  gene = GO_deseq2,
  organism = "hsa",
  keyType = "kegg",
  pvalueCutoff = 0.15,
  pAdjustMethod = "BH",
  qvalueCutoff = 0.15)
Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
kegg.out.outdata_deseq2 <- as.data.frame(kegg.out_deseq2)
# Uncomment to export the data, which are in ENTREZID format
# write.csv(kegg.out.outdata_deseq2, "E:/kegg.out.outdata.csv")

##### Convert numeric Entrez gene IDs or Ensembl gene IDs from above code to symbols
library(org.Hs.eg.db)
kegg.out1_deseq2 = as.data.frame(kegg.out_deseq2)
ENTREZID = strsplit(kegg.out1_deseq2$geneID, "/")
symbol = sapply(ENTREZID, function(x) {
  y = bitr(x, fromType = "ENTREZID", toType = "SYMBOL", OrgDb = "org.Hs.eg.db")
  # In case of multiple matches, take the first one
  y = y[!duplicated(y$ENTREZID), -1]
  y = paste(y, collapse = "/")
})
'select()' returned 1:1 mapping between keys and columns
'select()' returned 1:1 mapping between keys and columns
'select()' returned 1:1 mapping between keys and columns
kegg.out1_deseq2$geneID = symbol
kegg.out1.outdata_deseq2 <- as.data.frame(kegg.out1_deseq2)
# Uncomment to export the converted data
# write.csv(kegg.out1.outdata_deseq2, "E:/fruit/kegg.out1.outdata.csv")
head(kegg.out.outdata_deseq2, 5)
                   category                   subcategory       ID
hsa05150     Human Diseases Infectious disease: bacterial hsa05150
hsa04915 Organismal Systems              Endocrine system hsa04915
hsa04970 Organismal Systems              Digestive system hsa04970
                             Description GeneRatio  BgRatio       pvalue
hsa05150 Staphylococcus aureus infection      6/43 100/8842 7.819914e-06
hsa04915      Estrogen signaling pathway      6/43 139/8842 5.118328e-05
hsa04970              Salivary secretion      4/43  97/8842 1.210284e-03
             p.adjust       qvalue                            geneID Count
hsa05150 0.0006255931 0.0005926672 147183/353288/3881/3883/3886/8687     6
hsa04915 0.0020473312 0.0019395769 147183/353288/3881/3883/3886/8687     6
hsa04970 0.0322742503 0.0305756056              54831/1473/3346/3347     4
head(kegg.out1.outdata_deseq2, 5)
                   category                   subcategory       ID
hsa05150     Human Diseases Infectious disease: bacterial hsa05150
hsa04915 Organismal Systems              Endocrine system hsa04915
hsa04970 Organismal Systems              Digestive system hsa04970
                             Description GeneRatio  BgRatio       pvalue
hsa05150 Staphylococcus aureus infection      6/43 100/8842 7.819914e-06
hsa04915      Estrogen signaling pathway      6/43 139/8842 5.118328e-05
hsa04970              Salivary secretion      4/43  97/8842 1.210284e-03
             p.adjust       qvalue                               geneID Count
hsa05150 0.0006255931 0.0005926672 KRT25/KRT26/KRT31/KRT33A/KRT35/KRT38     6
hsa04915 0.0020473312 0.0019395769 KRT25/KRT26/KRT31/KRT33A/KRT35/KRT38     6
hsa04970 0.0322742503 0.0305756056                 BEST2/CST5/HTN1/HTN3     4

11.4.6 DO Analysis

erich.go.DO_deseq2 = enrichDO(gene = GO_deseq2,
                              ont = "DO", # Other categories: "CC", "MF" for molecular function
                              pvalueCutoff = 0.5,
                              qvalueCutoff = 0.5,
                              readable = TRUE)

erich.go.DO.outdata_deseq2 <- as.data.frame(erich.go.DO_deseq2)
# Uncomment to export the data
# write.csv(erich.go.DO.outdata_deseq2, "E:/fruit/erich.go.DO.outdata.csv")
head(erich.go.DO.outdata_deseq2, 5)
                       ID                          Description GeneRatio
DOID:0060121 DOID:0060121 integumentary system benign neoplasm      3/71
DOID:3165       DOID:3165                 skin benign neoplasm      3/71
DOID:11132     DOID:11132                prostatic hypertrophy      3/71
DOID:421         DOID:421                         hair disease      4/71
DOID:2345       DOID:2345    plasma protein metabolism disease      2/71
              BgRatio      pvalue  p.adjust    qvalue                 geneID
DOID:0060121 27/10312 0.000812625 0.1178306 0.1178306     KRT31/KRT35/MAGEA4
DOID:3165    27/10312 0.000812625 0.1178306 0.1178306     KRT31/KRT35/MAGEA4
DOID:11132   34/10312 0.001606147 0.1552608 0.1552608   MAGEA1/MAGEA3/MAGEA4
DOID:421     81/10312 0.002303062 0.1669720 0.1669720 CDSN/KRT25/KRT71/KRT83
DOID:2345    16/10312 0.005269780 0.2547060 0.2547060         CPN1/SERPINA12
             Count
DOID:0060121     3
DOID:3165        3
DOID:11132       3
DOID:421         4
DOID:2345        2

11.4.7 Reactome Pathway Analysis

erich.go.Reactome_deseq2 <- enrichPathway(gene = GO_deseq2, pvalueCutoff = 0.05, readable = TRUE)

erich.go.Reactome.outdata_deseq2 <- as.data.frame(erich.go.Reactome_deseq2)
# Uncomment to export the data
# write.csv(erich.go.Reactome.outdata_deseq2, "E:/fruit/erich.go.Reactome.outdata.csv")
head(erich.go.Reactome.outdata_deseq2, 5)
                         ID                         Description GeneRatio
R-HSA-6805567 R-HSA-6805567                      Keratinization     50/97
R-HSA-6809371 R-HSA-6809371 Formation of the cornified envelope     16/97
                BgRatio       pvalue     p.adjust       qvalue
R-HSA-6805567 214/11009 3.658421e-61 6.694910e-59 6.694910e-59
R-HSA-6809371 129/11009 1.744227e-14 1.595968e-12 1.595968e-12
                                                                                                                                                                                                                                                                                                                                                                                                                                         geneID
R-HSA-6805567 CDSN/KRT2/KRT25/KRT26/KRT31/KRT33A/KRT35/KRT38/KRT71/KRT73/KRT77/KRT83/KRT85/KRTAP1-1/KRTAP1-3/KRTAP10-1/KRTAP10-10/KRTAP10-11/KRTAP10-3/KRTAP10-5/KRTAP10-6/KRTAP10-7/KRTAP10-8/KRTAP11-1/KRTAP12-1/KRTAP12-2/KRTAP16-1/KRTAP17-1/KRTAP2-1/KRTAP2-2/KRTAP2-4/KRTAP24-1/KRTAP26-1/KRTAP3-1/KRTAP3-2/KRTAP3-3/KRTAP4-2/KRTAP4-3/KRTAP4-4/KRTAP4-6/KRTAP4-8/KRTAP4-9/KRTAP8-1/KRTAP9-3/KRTAP9-4/KRTAP9-7/KRTAP9-8/LCE1E/LCE2B/LCE2C
R-HSA-6809371                                                                                                                                                                                                                                                                                                                                    CDSN/KRT2/KRT25/KRT26/KRT31/KRT33A/KRT35/KRT38/KRT71/KRT73/KRT77/KRT83/KRT85/LCE1E/LCE2B/LCE2C
              Count
R-HSA-6805567    50
R-HSA-6809371    16

11.4.8 Usage example

# Suppose you have a dataframe named data and a list of descriptions you want to filter
descriptions_to_filter <- c("immunoglobulin production", "B cell mediated immunity")
filtered_data_BP <- extract_descriptions_counts(erich.go.BP.outdata_deseq2, descriptions_to_filter, "#009688")
print(filtered_data_BP)
                         Description Count   color
GO:0002377 immunoglobulin production    34 #009688
GO:0019724  B cell mediated immunity    29 #009688
descriptions_to_filter <- c("immunoglobulin complex", "keratin filament", "IgG immunoglobulin complex")
filtered_data_CC <- extract_descriptions_counts(erich.go.CC.outdata_deseq2, descriptions_to_filter, "#3f51b5")
print(filtered_data_CC)
                          Description Count   color
GO:0019814     immunoglobulin complex    60 #3f51b5
GO:0045095           keratin filament    39 #3f51b5
GO:0071735 IgG immunoglobulin complex     3 #3f51b5
descriptions_to_filter <- c("integumentary system benign neoplasm", "prostatic hypertrophy")
filtered_data_DO <- extract_descriptions_counts(erich.go.DO.outdata_deseq2, descriptions_to_filter, "#673ab7")
print(filtered_data_DO)
                                      Description Count   color
DOID:0060121 integumentary system benign neoplasm     3 #673ab7
DOID:11132                  prostatic hypertrophy     3 #673ab7
descriptions_to_filter <- c("antigen binding", "immunoglobulin receptor binding")
filtered_data_MF <- extract_descriptions_counts(erich.go.MF.outdata_deseq2, descriptions_to_filter, "#4caf50")
print(filtered_data_MF)
                               Description Count   color
GO:0003823                 antigen binding    49 #4caf50
GO:0034987 immunoglobulin receptor binding     3 #4caf50
descriptions_to_filter <- c("Keratinization", "Formation of the cornified envelope")
filtered_data_Reactome <- extract_descriptions_counts(erich.go.Reactome.outdata_deseq2, descriptions_to_filter, "#ffc107")
print(filtered_data_Reactome)
                                      Description Count   color
R-HSA-6805567                      Keratinization    50 #ffc107
R-HSA-6809371 Formation of the cornified envelope    16 #ffc107
descriptions_to_filter <- c("Staphylococcus aureus infection", "Salivary secretion")
filtered_data_kegg <- extract_descriptions_counts(kegg.out1.outdata_deseq2, descriptions_to_filter, "#ff9800")
print(filtered_data_kegg)
                             Description Count   color
hsa05150 Staphylococcus aureus infection     6 #ff9800
hsa04970              Salivary secretion     4 #ff9800

11.5 enrich_circo_bar

data_list <- list(
  filtered_data_BP, 
  filtered_data_CC, 
  filtered_data_DO,
  filtered_data_MF, 
  filtered_data_Reactome, 
  filtered_data_kegg
)
combined_and_visualized_data <- enrich_circo_bar(data_list)

print(combined_and_visualized_data)

11.6 enrich_circo_bar and sunburst

11.6.1 data

descriptions_to_filter <- c("IgG immunoglobulin complex")
filtered_data_CC <- extract_descriptions_counts(erich.go.CC.outdata_deseq2, descriptions_to_filter, "#ff9800")
print(filtered_data_CC)
                          Description Count   color
GO:0071735 IgG immunoglobulin complex     3 #ff9800
descriptions_to_filter <- c("prostatic hypertrophy")
filtered_data_DO <- extract_descriptions_counts(erich.go.DO.outdata_deseq2, descriptions_to_filter, "#4caf50")
print(filtered_data_DO)
                     Description Count   color
DOID:11132 prostatic hypertrophy     3 #4caf50
descriptions_to_filter <- c("immunoglobulin receptor binding")
filtered_data_MF <- extract_descriptions_counts(erich.go.MF.outdata_deseq2, descriptions_to_filter, "#009688")
print(filtered_data_MF)
                               Description Count   color
GO:0034987 immunoglobulin receptor binding     3 #009688
descriptions_to_filter <- c("Staphylococcus aureus infection","Salivary secretion")
filtered_data_kegg <- extract_descriptions_counts(kegg.out1.outdata_deseq2, descriptions_to_filter, "#3f51b5")
print(filtered_data_kegg)
                             Description Count   color
hsa05150 Staphylococcus aureus infection     6 #3f51b5
hsa04970              Salivary secretion     4 #3f51b5
data_list <- list(
  filtered_data_kegg,
  filtered_data_MF,
  filtered_data_DO,
  filtered_data_CC
)



enrichment <- c ("enrichment","enrichment","enrichment",
                 "enrichment","enrichment","enrichment",
                 "enrichment","enrichment","enrichment",
                 "enrichment","enrichment","enrichment","enrichment",
                 "enrichment","enrichment","enrichment","enrichment","enrichment","enrichment")


methods1 <- c("go CC","go CC","go CC",
              "go DO","go DO","go DO",
              "go MF","go MF","go MF",
              "KEGG","KEGG","KEGG","KEGG","KEGG","KEGG","KEGG","KEGG","KEGG","KEGG")

pathways1 <- c("IgG immunoglobulin complex","IgG immunoglobulin complex","IgG immunoglobulin complex",
               "prostatic hypertrophy","prostatic hypertrophy","prostatic hypertrophy",
               "immunoglobulin receptor binding","immunoglobulin receptor binding","immunoglobulin receptor binding",
               "Salivary secretion","Salivary secretion","Salivary secretion","Salivary secretion",
               "Staphylococcus aureus infection","Staphylococcus aureus infection","Staphylococcus aureus infection",
               "Staphylococcus aureus infection","Staphylococcus aureus infection","Staphylococcus aureus infection")

change1 <- c("CC_up","CC_up", "CC_down",
             "DO_up","DO_up", "DO_up",
             "MF_up","MF_up", "MF_up",
             "KEGG1_down","KEGG1_up", "KEGG1_up", "KEGG1_up",
             "KEGG2_down","KEGG2_down","KEGG2_down","KEGG2_down","KEGG2_down","KEGG2_down")

genes1 <- c("CC IGHG1","CC IGHG3", "IGLC1",
            "MAGEA1", "MAGEA3", "MAGEA4",
            "MF IGHG1", "MF IGHG3", "MF IGHM",
            "BEST2","CST5","HTN1","HTN3",
            "KRT25","KRT26","KRT31","KRT33A","KRT35","KRT38")


logFC1<- c(8.79, 8.52, 8.55, 
           11.21, 11.28, 12.46, 
           8.79, 8.52, 8.77,
           8.66, 8.97,12.70,13.31,
           10.22, 8.31,8.48,8.42,9.21,8.43)


test1 <- data.frame(enrichment, methods1, pathways1, change1, genes1, logFC1)

# Custom colors
colors <- rev(c("#009688","#4caf50","#ff9800","#3f51b5"))

11.6.2 function

# Function to create the circular bar chart
enrich_circo_bar1 <- function(data_list, num_dummy) {
  # Combine data and arrange by Count
  combined_data <- dplyr::bind_rows(data_list)
  combined_data <- combined_data %>% dplyr::arrange(dplyr::desc(.data$Count)) %>%
    dplyr::mutate(id = dplyr::row_number())
  
  # Add dummy rows to create empty space in the center
  num_dummy <- num_dummy  # Number of dummy rows
  dummy_data <- data.frame(
    Description = rep("dummy", num_dummy),
    Count = rep(0, num_dummy),
    color = rep("#FFFFFF", num_dummy)  # White color for dummy bars
  )
  dummy_data <- dummy_data %>% dplyr::mutate(id = max(combined_data$id) + dplyr::row_number())
  
  # Combine real and dummy data
  combined_data <- dplyr::bind_rows(combined_data, dummy_data)
  
  # Mutate Description to a factor with levels in the order of appearance
  combined_data <- combined_data %>% dplyr::mutate(Description = factor(.data$Description, 
                                                                        levels = unique(.data$Description)))
  
  # Set fill colors, including dummy color
  fill_colors <- c(combined_data$color[match(levels(combined_data$Description), combined_data$Description)], "#FFFFFF")
  
  # Calculate maximum values for limits
  max_count <- max(combined_data$Count) + (max(combined_data$Count) / 5)
  max_id <- max(combined_data$id) + 1.5
  
  # Create plot
  p <- ggplot2::ggplot(combined_data, ggplot2::aes(x = .data$id, y = .data$Count, fill = .data$Description)) +
    ggplot2::geom_bar(stat = "identity", width = 0.7) +
    ggplot2::geom_text(ggplot2::aes(x = .data$id, y = 0, label = ifelse(.data$Description == "dummy", "", as.character(.data$Description)), 
                                    color = .data$Description), 
                       hjust = 1.03, size = 3.5) +  # Remove color assignment from here
    ggplot2::scale_fill_manual(values = fill_colors, guide = "none") +
    ggplot2::scale_color_manual(values = fill_colors, guide = "none") +  # Assign colors to text
    ggplot2::scale_y_continuous(expand = c(0, 0), limits = c(0, max_count), position = "right") +
    ggplot2::scale_x_reverse(expand = c(0, 0), limits = c(max_id, -0.1)) +  # Reverse the x-axis
    ggplot2::coord_polar(theta = "y", start = 0) +
    ggplot2::labs(title = "Enrichment CircularBar Chart", 
                  subtitle = "Including: BP/MF/CC/DO/KEGG/Reactome") +
    ggplot2::theme_minimal() +
    ggplot2::theme(plot.background = ggplot2::element_rect(fill = "white", color = "white"), 
                   axis.title = ggplot2::element_blank(), axis.text = ggplot2::element_blank())
  
  return(p)
}




my_sunburst1 <- function (test, custom_colors = NULL, ...) 
{
  test <- as.data.frame(test)
  if (length(unique(test[, 1])) > 1) {
    test <- cbind(Root = " ", test)
  }
  nc <- ncol(test)
  if (nc < 3) 
    stop("as least 3-columns dataframe")
  if (!is.numeric(test[, nc])) 
    stop("the last column must be numeric")
  
  lib_ps("plotly", library = FALSE)
  target <- source <- weight <- NULL
  links <- data.frame()
  for (i in 1:(nc - 2)) {
    tmp <- test[, c(i, i + 1, nc)]
    colnames(tmp) <- c("source", "target", "weight")
    tmp <- dplyr::group_by(tmp, source, target) %>% dplyr::summarise(weight = sum(weight), 
                                                                     .groups = "keep")
    links <- rbind(links, tmp)
  }
  
  if (is.null(custom_colors)) {
    # Use default colors if no custom colors provided
    fig <- plotly::plot_ly(labels = links$target, parents = links$source, 
                           values = links$weight, text = links$weight, type = "sunburst", 
                           ...)
  } else {
    # Use custom colors
    fig <- plotly::plot_ly(labels = links$target, parents = links$source, 
                           values = links$weight, text = links$weight, type = "sunburst", 
                           marker = list(colors = custom_colors), ...)
  }
  
  fig
}

11.6.3 bar

# Call the function to create the plot
combined_and_visualized_data <- enrich_circo_bar1(data_list, 15)

# Print the plot
print(combined_and_visualized_data)

11.6.4 sunburst

# Create the sunburst plot with custom colors
fig <- my_sunburst1(test1, custom_colors = colors)

# Display the plot
fig

11.6.5 combined_plot

# Convert ggplot to image
circular_bar_image <- image_graph(width = 1000, height = 1000, res = 96)
print(combined_and_visualized_data)
dev.off()
png 
  2 
# Save the sunburst chart as HTML and convert it to an image
html_file <- tempfile(fileext = ".html")
png_file <- tempfile(fileext = ".png")
saveWidget(fig, html_file, selfcontained = TRUE)
webshot(html_file, png_file, vwidth = 800, vheight = 800, cliprect = c(0, 0, 800, 800))

# Read the saved image
img <- image_read(png_file)

# Create a circular mask
mask <- image_draw(image_blank(800, 800, color = "none"))
symbols(400, 400, circles = 400, inches = FALSE, add = TRUE, fg = "grey50", bg = "grey50")
dev.off()
png 
  2 
# Apply the mask to generate a circular image
img_circular <- image_composite(img, mask, operator = "copyopacity")

# Convert the generated circular bar chart and circular sunburst chart to ggplot objects
circular_bar_plot <- ggdraw() + draw_image(circular_bar_image, scale = 1)
sunburst_plot <- ggdraw() + draw_image(img_circular, scale = 0.5)

# Overlay the sunburst chart at the center of the circular bar chart
combined_plot <- circular_bar_plot + draw_plot(sunburst_plot, x = -0.02, y = -0.02, width = 1, height = 1)

# Print the combined plot
print(combined_plot)

11.7 Reference

  • ggplot2:

github:ggplot2

An implementation of the Grammar of Graphics in R